

    
   ***data paths 
   global archive /Volumes/Sans titre/DATA_MICROEU
   global posted /Volumes/Sans titre/DATA_MICROEU/Posting
   global data /Volumes/Sans titre/SIPSI/raw
  
   
   
   **output paths
  global replication_outputs_a /Users/Mathilde/Dropbox/Posted_workers/Replication/TNT_final/Replication_package/Appendix_figures
  global replication_estimates /Users/Mathilde/Dropbox/Posted_workers/Replication/TNT_final/Replication_package/Main_estimates
  global replication_estimates_a /Users/Mathilde/Dropbox/Posted_workers/Replication/TNT_final/Replication_package/Appendix_estimates
  global replication_outputs /Users/Mathilde/Dropbox/Posted_workers/Replication/TNT_final/Replication_package/Main_figures




*******************************************************************************
*     This dofile uses raw sipsi data to reproduce Figure 3 and Figure D25
******************************************************************************
 
 
     **********************************************
     *Importing raw SIPSI posting forms from DARES
	 *********************************************
	 
     import delimited "$data/pays.csv", delimiter(";") encoding(ISO-8859-1)clear
     rename v3 eu
     rename v1 iso
     replace eu="true" if iso=="GB"
     save "$data/iso.dta", replace

     import delimited "$data/dpd_sipsi.csv", clear    
     save "$data/dpd.dta", replace

   *SIPSI forms by posting contract
   
   import delimited "$data/Salaries_sipsi.csv", clear    
   gen debut=date(detach_date_debut,"DMY")
   gen fin=date(detach_date_fin,"DMY")
   gen contrat=date(date_signature_contrat,"DMY")
   format debut %td 
   format fin %td 
   format contrat %td
   g duration=fin-debut
   
   sum duration, d 
   sum duration if pays_residence=="PL", d
   sum duration if pays_residence=="ES", d   
   sum duration if pays_residence=="PT", d
   sum duration if pays_residence=="DE", d   
   sum duration if pays_residence=="BE", d
   sum duration if pays_residence=="RO", d   
   sum duration if pays_residence=="UK", d
   sum duration if pays_residence=="BG", d
   sum duration if pays_residence=="FR", d
   
   g year=substr(detach_date_debut,7,10)
   destring year, replace
   replace duration=1 if debut==fin
   g freq=1
   
   sum duration if year==2018, d 
   global mean=round(r(mean),0.01)
   global median=round(r(p50),0.01)

 
   
   **** weighted flows**** 
    
*There are discrepancies in the way foreign firms fill the wage variable. Most companies appear to fill monthly basis rates for their employees (85% fill wages > 1000 euros, including for contracts that last only one day). There is large bunching at the French monthly legal wage (1498 in 2018), but also smaller bunching at the French minimum legal wage (9.88/hour which is 1498/151/7). Since 2021 (outside this period of stuy), the SIPSI reporting system has been modified and ask explicitely companies to fill hourly wage rates, and to convert the monthly rates of their employees into gross hourly rates when filling the SIPSI form. 
 
*I convert the monthly equivalent basis rate into hourly equivalent when raw wage filled is more than 500euros,
*using the French legal basis of 151.7 hours per month that is used to compute the enforcement of the minimum legal wage (that is binding for foreign firms). 
 
  g salaire_r=.
  replace salaire_r=remuneration/151.7 if remuneration>500
  
*For wages below 200 per hours, I assume those are already hourly rates and leave the raw data (*Only 0.8% of observations have a reported wage between 100 and 200 euros)

  replace salaire_r=remuneration if remuneration<200
  
*Origin Wages in 200-500 range account for less than 1% of remaining cases-- do not take a stance and leave as missing 
 
 *winsorize to get rid of some extreme values driven by reporting mistakes (few hourly wages are in the million/billion euros, even though the workers are classified as blue collars)
  winsor salaire_r, p(0.01) gen(salaire_r_w)
  drop salaire_r
  rename salaire_r_w salaire_r
  
   
   *Create a weight based on hourly equivalent wage and duration of posting contract 
   g value=duration*salaire_r
   
   preserve
   foreach y in ouvrier DALLEUR Ouvrier ELECTRICIEN Plombier Electricien OUVRIER BATIMENT MACON FUMISTE MONTEUR ouvrier Monteur monteur Soudeur OUVRIER MACON PLOMBIER MECANICIEN{
   replace qualification="OUVRIER" if metier=="`y'"  & missing(qualification)
   }
   
   drop if missing(qualification)
   egen tot_v=sum(value) if qualification!=".", by(year)
   egen tot=sum(freq) if qualification!=".", by(year)
   collapse (sum) freq value (mean) tot_v tot, by(year qualification)
   g s=freq/tot
   g s_v=value/tot_v
   
   *Figure D25
   graph bar s_v s if year==2019, over(qualification, sort(qualification) relabel( 1 "Foremen" 3 "Employees" 2 "Managers" 4 "Blue Collar" 5 "Technicians")) ///
   graphregion(fcolor(white) lcolor(white)) legend(label(1 "% in Euro Value") label(2 "% in Posted Workers")) ///
   ylabel(0.2 "20%" 0.4 "40%" 0.6 "60%") 
   graph export "$replication_outputs_a/FigureD25.png", replace

   restore

   merge m:1 numenvoi using "$data/dpd.dta"
   save "$data/dpd_salarie.dta", replace

   use "$data/dpd_salarie.dta", clear
  
  *drop covid year 
   drop if year==2020
   preserve 
  
  *few missing sectors 
  drop if missing(secteur_redresse)
  egen tot_v=sum(value), by(year)
  egen tot=sum(freq), by(year)
   
  collapse (sum) freq value (mean) tot_v tot, by(year secteur_redresse)
   
   g s=freq/tot
   g s_v=value/tot_v
   sum s_v 
	 
   graph bar s_v s if year==2019 & secteur_redresse!="A recoder", over(secteur_redresse, sort(secteur_redresse)   relabel( 2 "Construction" 3 "Industrial Sevices" 4 "Other Services") ) ///
   graphregion(fcolor(white) lcolor(white)) legend(label(1 "% in Euro Value") label(2 "% in Posted Workers")) ///
   ylabel(0.2 "20%" 0.4 "40%" ) 
   
   restore
   
   preserve 
     collapse (sum) freq value, by(year secteur_redresse metier)
	 sort value secteur_redresse
     br if secteur_redresse=="Industrie"
	 br if secteur_redresse=="Service"
	 br if secteur_redresse=="Construction"
	 restore 

  drop if missing(secteur_redresse)
  egen tot_v=sum(value), by(year)
  egen tot=sum(freq), by(year)
   
  collapse (sum) freq value (mean) tot_v tot, by(year secteur_redresse)
   
   g s=freq/tot
   g s_v=value/tot_v
   sum s_v 
     
	 *Figure 3
	 
     graph bar s_v s if year==2019 & secteur_redresse!="A recoder", over(secteur_redresse, sort(secteur_redresse)   relabel( 2 "Construction" 3 "Industrial Services" 4 "Other Services") ) ///
   graphregion(fcolor(white) lcolor(white)) legend(label(1 "% in Euro Value") label(2 "% in Posted Workers")) ///
   ylabel(0.2 "20%" 0.4 "40%" )  ///
   ttext(0.15 2 "#1: Farm Worker", place(e) size(small) ) ///
      ttext(0.39 30 "#1: Builder", place(e) size(small) ) ///
     ttext(0.39 57 "#1: Welder", place(e) size(small) )  ///
     ttext(0.26 83 "#1: Truck Driver", place(e) size(small) )  ///
   title("Services Performed by Foreign Employees Posted to France", size(small)) 
      graph export "$replication_outputs/Figure3.pdf", replace
   
 